export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000
export VLLM_ENFORCE_CUDA_GRAPH=1

vllm serve /mnt/nvme1n1/DeepSeek-R1-Distill-Llama-70B --served_model_name "deepseek-llama70" -tp 8 --max-model-len $[16*1024] --enable_chunked_prefill --enable_prefix_caching --trust-remote-code --host 0.0.0.0 --port 8000
